sourceCpp(here("src/labelling.cpp"))

# load PA district graph
map = read_rds(here("data/pa_map.rds"))
g = district_graph(map$cd_court, 18, map$adj)

# look at how accurate IS estimates are ----

pars = expand_grid(n_vtx = 5:13, 
                   n_est = 10^seq(2.5, 4, by=0.25), 
                   i = 1:8)

# make a random subgraph of `adj` of size `n`
subgraph = function(adj, n) {
    index <- rep(NA_real_, n)
    index[1] <- sample.int(length(adj), 1)
    candidates <- adj[[index[1]]] + 1
    i <- 1
    while (i < n) {
        i <- i + 1
        index[i] <- sample(candidates, 1)
        candidates <- dplyr::union(candidates, adj[[index[i]]] +
                                       1) %>% dplyr::setdiff(index)
    }
    redist.reduce.adjacency(adj, sort(index))
}

est_labs = function(n_vtx, n_est, ...) {
    g_sub = subgraph(g, n_vtx)
    
    ests = random_labelings(g_sub, sqrt(lengths(g_sub)), n_est)
    min_est = min(ests)
    ests = exp(min_est - ests)
    
    tibble(n_vtx = n_vtx,
           n_est = n_est,
           log_act = count_labelings_cpp(g_sub),
           log_est = log(mean(ests)) - min_est,
           se = sd(ests) / sqrt(n_est) / exp(min_est),
           log_se = sd(ests) / sqrt(n_est) / mean(ests))
}

res = pmap_dfr(pars, est_labs)

res |> 
    mutate(bias = log_est - log_act,
           n_vtx = fct_inorder(str_c(n_vtx, " vertices"))) |> 
ggplot(aes(n_est, bias)) +
    facet_wrap(~ n_vtx) +
    geom_hline(yintercept=0, color="red") +
    geom_pointrange(aes(ymin=bias-2*log_se, ymax=bias+2*log_se),
                    position=position_dodge2(width=0.16), 
                    fatten=0.4, linewidth=0.2, color="#222222") +
    scale_x_log10("Number of importance samples", labels=comma) +
    labs(y=expression( Bias~'in'~log~psi(bgroup("[", xi, "]")) )) +
    theme_bw(base_family="Times", base_size=10)

ggsave("figures/label_is.pdf", width=7, height=5)

        